library(readr)
library(MASS)
library(car)
combined_data <- read_csv("compared_to_whom_final_data.csv")
combined_data$sample<-relevel(as.factor(combined_data$sample),ref="principal")
combined_data$condition<-relevel(as.factor(combined_data$condition),ref="control")

princ_data<-combined_data[combined_data$sample=="principal",]
genpop_data<-combined_data[combined_data$sample=="general population",]
stud_data<-combined_data[combined_data$sample=="student",]

###Histograms (Figure 1)
png("comhist.png",res=300,units="in",height=7,width=7)
par(mfrow=c(3,2))
par(oma=c(0,0,2,0))
par(mar=c(3,5,2,0))
hist(combined_data$est_performance[combined_data$condition=="control"],
     main="Control Group",col="grey75",xlab="Estimated Performance",cex.main=2)
text(20,95,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="control"],na.rm=T),2)),font=2,cex=1.5)
text(20,80,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="control"],na.rm=T),2)),font=2,cex=1.5)
text(20,65,paste("N=",length(combined_data$est_performance[combined_data$condition=="control"])),font=2,cex=1.5)
plot(0,0,type="n",axes="F",ylab="",xlab="")
text(0,0,"Combined Sample",font=2,cex=2)
hist(combined_data$est_performance[combined_data$condition=="historic high"],
     main="Historic High",col="grey75",xlab="Estimated Performance",ylim=c(0,100),cex.main=2)
text(20,95,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic high"],na.rm=T),2)),font=2,cex=1.5)
text(20,80,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic high"],na.rm=T),2)),font=2,cex=1.5)
text(20,65,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic high"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="historic low"],
     main="Historic Low",col="grey75",xlab="Estimated Performance",ylim=c(0,60),cex.main=2)
text(85,60,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic low"],na.rm=T),2)),font=2,cex=1.5)
text(85,50,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic low"],na.rm=T),2)),font=2,cex=1.5)
text(85,40,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic low"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social high"],
     main="Social High",col="grey75",xlab="Estimated Performance",ylim=c(0,120),cex.main=2)
text(20,115,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social high"],na.rm=T),2)),font=2,cex=1.5)
text(20,95,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social high"],na.rm=T),2)),font=2,cex=1.5)
text(20,75,paste("N=",length(combined_data$est_performance[combined_data$condition=="social high"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social low"],
     main="Social Low",col="grey75",xlab="Estimated Performance",cex.main=2)
text(80,55,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social low"],na.rm=T),2)),font=2,cex=1.5)
text(80,45,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social low"],na.rm=T),2)),font=2,cex=1.5)
text(80,35,paste("N=",length(combined_data$est_performance[combined_data$condition=="social low"])),font=2,cex=1.5)
dev.off()

png("princhist.png",res=300,units="in",height=7,width=7)
par(mfrow=c(3,2))
par(oma=c(0,0,2,0))
par(mar=c(3,5,2,.5))
hist(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="principal"],ylim=c(0,30),
     main="Control Group",col="grey75",xlab="Estimated Performance",cex.main=2)
text(35,30,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(35,25,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(35,20,paste("N=",length(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="principal"])),font=2,cex=1.5)
plot(0,0,type="n",axes="F",ylab="",xlab="")
text(0,0,"Principal Sample",font=2,cex=2)
hist(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="principal"],main="Historic High",col="grey75",xlab="Estimated Performance",ylim=c(0,40),cex.main=2)
text(20,40,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(20,35,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(20,30,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="principal"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="principal"],main="Historic Low",col="grey75",xlab="Estimated Performance",ylim=c(0,25),cex.main=2)
text(20,25,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(20,22,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(20,19,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="principal"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="principal"],main="Social High",col="grey75",xlab="Estimated Performance",ylim=c(0,40),cex.main=2)
text(22,38,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(22,34,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(22,30,paste("N=",length(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="principal"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="principal"],main="Social Low",col="grey75",xlab="Estimated Performance",ylim=c(0,30),xlim=c(0,95),cex.main=2)
text(20,30,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(20,27,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="principal"],na.rm=T),2)),font=2,cex=1.5)
text(20,24,paste("N=",length(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="principal"])),font=2,cex=1.5)
dev.off()

png("studhist.png",res=300,units="in",height=7,width=7)
par(mfrow=c(3,2))
par(oma=c(0,0,2,0))
par(mar=c(3,5,2,.5))
hist(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="student"],
     main="Control Group",col="grey75",xlab="Estimated Performance",ylim=c(0,30),xlim=c(0,100),cex.main=2)
text(20,29,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(20,26,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(20,23,paste("N=",length(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="student"])),font=2,cex=1.5)
plot(0,0,type="n",axes="F",ylab="",xlab="")
text(0,0,"Student Sample",font=2,cex=2)
hist(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="student"],main="Historic High",col="grey75",xlab="Estimated Performance",cex.main=2)
text(42,29,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(42,25,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(42,21,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="student"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="student"],main="Historic Low",col="grey75",xlab="Estimated Performance",ylim=c(0,22),cex.main=2)
text(20,20,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(20,17,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(20,14,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="student"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="student"],main="Social High",col="grey75",xlab="Estimated Performance",ylim=c(0,25),cex.main=2)
text(20,24,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(20,21.5,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(20,19,paste("N=",length(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="student"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="student"],main="Social Low",col="grey75",xlab="Estimated Performance",ylim=c(0,18),xlim=c(0,100),cex.main=2)
text(17,15,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(15,13,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="student"],na.rm=T),2)),font=2,cex=1.5)
text(15,11,paste("N=",length(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="student"])),font=2,cex=1.5)
dev.off()

png("mturkhist.png",res=300,units="in",height=7,width=7)
par(mfrow=c(3,2))
par(oma=c(0,0,2,0))
par(mar=c(3,5,2,.5))
hist(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="general population"],
     main="Control Group",col="grey75",xlab="Estimated Performance",ylim=c(0,60),cex.main=2)
text(20,58,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(20,50,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(20,42,paste("N=",length(combined_data$est_performance[combined_data$condition=="control"&combined_data$sample=="general population"])),font=2,cex=1.5)
plot(0,0,type="n",axes="F",ylab="",xlab="")
text(0,0,"MTurk Sample",font=2,cex=2)
hist(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="general population"],main="Historic High",col="grey75",xlab="Estimated Performance",cex.main=2)
text(20,38,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(20,33,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(20,28,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic high"&combined_data$sample=="general population"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="general population"],main="Historic Low",col="grey75",xlab="Estimated Performance",ylim=c(0,30),cex.main=2)
text(80,29,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(80,25,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(80,21,paste("N=",length(combined_data$est_performance[combined_data$condition=="historic low"&combined_data$sample=="general population"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="general population"],main="Social High",col="grey75",xlab="Estimated Performance",ylim=c(0,60),cex.main=2)
text(20,58,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(20,51,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(20,44,paste("N=",length(combined_data$est_performance[combined_data$condition=="social high"&combined_data$sample=="general population"])),font=2,cex=1.5)
hist(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="general population"],main="Social Low",col="grey75",xlab="Estimated Performance",xlim=c(0,100),cex.main=2)
text(80,29,paste("Mean=",round(mean(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(80,25,paste("SD=",round(sd(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="general population"],na.rm=T),2)),font=2,cex=1.5)
text(80,21,paste("N=",length(combined_data$est_performance[combined_data$condition=="social low"&combined_data$sample=="general population"])),font=2,cex=1.5)
dev.off()

###Regression Models - Table 2
lm_full<-lm(est_performance~condition+sample+white+male+ideology+age,data=combined_data)
summary(lm_full)
lm_principals<-lm(est_performance~condition+white+male+ideology+age,data=combined_data[combined_data$sample=="principal",])
summary(lm_principals)
lm_students<-lm(est_performance~condition+white+male+ideology+age,data=combined_data[combined_data$sample=="student",])
summary(lm_students)
lm_genpop<-lm(est_performance~condition+white+male+ideology+age,data=combined_data[combined_data$sample=="general population",])
summary(lm_genpop)

###Regression Models - Table 3
lm_controls_int<-lm(est_performance~condition*sample+white+male+ideology+age,data=combined_data)
summary(lm_controls_int)

###Figure 2 - Predicted Values Plot
##Generating Predicted Values
simprinc.dat<-data.frame(condition=c("control","historic high","historic low","social high","social low"),
                    sample=c("principal"),
                    age=mean(combined_data[combined_data$sample=="principal",]$age,na.rm=T),
                    white=1,
                    male=1,
                    ideology=mean(combined_data[combined_data$sample=="principal",]$ideology,na.rm=T))
simstud.dat<-data.frame(condition=c("control","historic high","historic low","social high","social low"),
                         sample=c("student"),
                         age=mean(combined_data[combined_data$sample=="student",]$age,na.rm=T),
                         white=1,
                         male=1,
                         ideology=mean(combined_data[combined_data$sample=="student",]$ideology,na.rm=T))
simgenpop.dat<-data.frame(condition=c("control","historic high","historic low","social high","social low"),
                         sample=c("general population"),
                         age=mean(combined_data[combined_data$sample=="general population",]$age,na.rm=T),
                         white=1,
                         male=1,
                         ideology=mean(combined_data[combined_data$sample=="general population",]$ideology,na.rm=T))    

predprinc.dat<-data.frame(predict(lm_principals,simprinc.dat,interval = "confidence"))
predstud.dat<-data.frame(predict(lm_students,simstud.dat,interval = "confidence"))
predgenpop.dat<-data.frame(predict(lm_genpop,simgenpop.dat,interval = "confidence"))

pred.dat<-data.frame(rbind(predprinc.dat,predstud.dat,predgenpop.dat))

##Plotting Predicted Values
png("preds.png",res=300,units="in",height=5,width=8)
par(mar=c(4.1,4.1,1,8.1))
plot(c(0,20,40,60,80),predprinc.dat$fit,pch=19,
     ylab="Estimated Performance",xlab="",
     col=c("grey50")
     ,xlim=c(0,86),ylim=c(45,85),
     axes=F)
box()
axis(2)
points(c(3,23,43,63,83),predstud.dat$fit,pch=19,col="royalblue3")
points(c(6,26,46,66,86),predgenpop.dat$fit,pch=19,col="darkred")
axis(1,at=c(0,20,40,60,80),c("Control","Historic High","Historic Low","Social High","Social Low"))
segments(c(0,20,40,60,80),
         predprinc.dat$lwr,
         c(0,20,40,60,80), 
         predprinc.dat$upr,
         lty=1,lwd=2,col=c("grey50"))
segments(c(3,23,43,63,83),
         predstud.dat$lwr,
         c(3,23,43,63,83), 
         predstud.dat$upr,
         lty=1,lwd=2,col=c("royalblue3"))
segments(c(6,26,46,66,86),
         predgenpop.dat$lwr,
         c(6,26,46,66,86), 
         predgenpop.dat$upr,
         lty=1,lwd=2,col=c("darkred"))
legend(x="topright", legend=c("Principal","Student","Mturk"), title="Sample", bty="n", 
       lwd=2,col=c("grey75","royalblue3","darkred"), , inset=c(-.25, 0), xpd=TRUE)
dev.off()

###Randomization Checks (Appendix)
prage<-aov(age~condition,data=princ_data)
summary(prage)
prwhite = table(princ_dat1$white, princ_dat1$condition) 
chisq.test(prwhite)
prmale = table(princ_dat1$male, princ_dat1$condition) 
chisq.test(prmale)
prideol<-aov(ideology~condition,data=princ_data)
summary(prideol)

mtage<-aov(age~condition,data=genpop_data)
summary(mtage)
gnwhite = table(genpop_data$white,genpop_data$condition) 
chisq.test(gnwhite)
gnmale = table(genpop_data$male, genpop_data$condition) 
chisq.test(gnmale)
gnideol<-aov(ideology~condition,data=genpop_data)
summary(gnideol)

stage<-aov(age~condition,data=stud_data)
summary(stage)
stwhite = table(stud_data$white,stud_data$condition) 
chisq.test(stwhite)
stmale = table(stud_data$male, stud_data$condition) 
chisq.test(stmale)
stideol<-aov(ideology~condition,data=stud_data)
summary(stideol)

###ANOVA Models (Appendix)
anova_twoway<-aov(est_performance~condition+sample,data=combined_data)
anova_table1<-Anova(anova_twoway, type="II")
anova_twoway_int<-aov(est_performance~condition*sample,data=combined_data)
anova_table2<-Anova(anova_twoway_int, type="II")
anova_twoway_intcontrols<-aov(est_performance~condition*sample+white+male+ideology+age,data=combined_data)
anova_table3<-Anova(anova_twoway_intcontrols, type="II")